---
output: pdf_document
title: "Appendix D: Belt and Road and UNGA Voting"
geometry: margin=1in
mainfont: cochineal
fontsize: 11pt
linestretch: 1.15
endnote: no
sansitup: no
graphics: yes
toc: yes

header-includes:
- \usepackage{float} #use the 'float' package
- \floatplacement{figure}{H} #make every figure with caption = h
- \usepackage{graphicx}
- \usepackage{longtable}
- \LTcapwidth=.95\textwidth
- \linespread{1.05}
- \usepackage{hyperref}
- \usepackage{booktabs}
- \usepackage{subfig}
- \renewcommand{\figurename}{Figure B.}
- \makeatletter
- \def\fnum@figure{\figurename\thefigure}
- \makeatother

subtitle: 'Cluster analysis'

---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, warning = F, comment = F, message = F, fig.show = 'hide', fig.ncol = 1, fig.align = "center")

library(tidyverse)
library(giscoR)
library(zoo)
library(gsynth)
library(countrycode)
library(readxl)
library(car)
library(stargazer)
library(sandwich)
library(lmtest)

```


# Analysis of China


```{r}
# Load data
data_china <- read_csv2("./replication_files/data/data/final_data/china_data_with_cluster.csv") %>% filter(year > 2003)
data_china %>% distinct(ctry1, cluster) %>% arrange(cluster)

# Explore scale of Ideal Point distances to China
range(data_china$IdealPointDistance, na.rm = T)

#### Reproduces Figure 3 in the manuscript ####
#pdf(file= "./graphs/china/idealpoints_dist.pdf", width=10, height=6)
plot(density(data_china$IdealPointDistance, na.rm = T), main = "", 
     xlim = c(0, 4))
abline(v = 3.362847, lty=2)
abline(v = 0.6225892, lty=2)
abline(v = 1.599358, lty=2)
abline(v = 2.176487, lty=2)
text(x = 0.6225892, y = 0.9, "RUSSIA\n2018", cex = 1.1)
text(x = 3.362847, y = 0.9, "US\n2018", cex = 1.1)
text(x = 1.599358, y = 0.9, "ITALY\n2018", cex = 1.1)
text(x = 2.176487, y = 0.9, "UK\n2018", cex = 1.1)
#dev.off()

```

## Cluster 1

```{r}
df <- data_china %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 1)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
#Pre-treatment trends
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')


#ggsave('./graphs/china/pre_treatment1.pdf', width = 7, height = 3)


```


```{r, }
#### Reproduces Figure A.5 ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)



results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)


ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  
  
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/china/results_cluster1.pdf', width = 7, height = 3)



plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_cluster1.pdf', width = 7, height = 3)




```



```{r, fig.show='hold', out.width="80%", fig.cap="Cluster 1", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
# Save plots
knitr::include_graphics(c('./graphs/china/pre_treatment1.pdf',
                          './graphs/china/results_cluster1.pdf',
                          './graphs/china/ct_cluster1.pdf'))

```


## Cluster 2


```{r}
df <- data_china %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 2)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```


```{r}
# Pre-treatment trends
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/china/pre_treatment2.pdf', width = 7, height = 3)

```


```{r, }
#### Reproduces Figure A.6 ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/china/results_cluster2.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_cluster2.pdf', width = 7, height = 3)


```

```{r , fig.show='hold', out.width="80%",fig.cap = "Cluster 6", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
# Save plots
knitr::include_graphics(c('./graphs/china/pre_treatment2.pdf',
                          './graphs/china/results_cluster2.pdf',
                          './graphs/china/ct_cluster2.pdf'))
```


## Cluster 3


```{r}
df <- data_china %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 4)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```



```{r}
# Pre-treatment trends
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/china/pre_treatment4.pdf', width = 7, height = 3)

```


```{r, }
#### Reproduces Figure A.7 ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)


ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  
  
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/china/results_cluster4.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_cluster4.pdf', width = 7, height = 3)

```

```{r , fig.show='hold', out.width="80%", fig.cap = "Cluster 4", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/china/pre_treatment4.pdf',
                          './graphs/china/results_cluster4.pdf',
                          './graphs/china/ct_cluster4.pdf'))
```



## Cluster 4


```{r}
df <- data_china %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 5 & ctry1 != 'Canada')
table <- df_sub %>% select(ctry1) %>% distinct()
```

```{r}
knitr::kable(table)
```

```{r}
#### Reproduces Figure 5a in Manuscript ####
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')


#ggsave('./graphs/china/pre_treatment5.pdf', width = 7, height = 3)

```


```{r, }
#### Reproduces Figure 5b & 5c in Manuscript ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  
  
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/china/results_cluster5.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_cluster5.pdf', width = 7, height = 3)
```

```{r , fig.show='hold', out.width="80%",fig.cap = "Cluster 5", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
# Save plots
knitr::include_graphics(c('./graphs/china/pre_treatment5.pdf',
                          './graphs/china/results_cluster5.pdf',
                          './graphs/china/ct_cluster5.pdf'))
```


## Cluster 5


```{r}
df <- data_china %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 6)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```


```{r}
# Pre-treatment trends
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/china/pre_treatment6.pdf', width = 7, height = 3)


```


```{r, }
#### Reproduces Figure A.8 ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 

#ggsave('./graphs/china/results_cluster6.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_cluster6.pdf', width = 7, height = 3)

```

```{r , fig.show='hold', out.width="80%", fig.cap = "Cluster 6", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
# Save plots
knitr::include_graphics(c('./graphs/china/pre_treatment6.pdf',
                          './graphs/china/results_cluster6.pdf',
                          './graphs/china/ct_cluster6.pdf'))
```


## Cluster 6


```{r}
df <- data_china %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 7 & ctry1 != 'Canada')
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```

```{r}
# Pre-treatment trends
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/china/pre_treatment7.pdf', width = 7, height = 3)

```



```{r, }
#### Reproduces Figure A.9 ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)


results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  
  
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 

#ggsave('./graphs/china/results_cluster7.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_cluster7.pdf', width = 7, height = 3)

```

```{r , fig.show='hold', out.width="80%", fig.cap = "Cluster 7", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
# Save plots
knitr::include_graphics(c('./graphs/china/pre_treatment7.pdf',
                          './graphs/china/results_cluster7.pdf',
                          './graphs/china/ct_cluster7.pdf'))
```


# Analysis of USA


```{r}
data_usa <- read_csv2("./replication_files/data/data/final_data/usa_data_with_cluster.csv") %>% filter(year > 2003)

data_usa %>% select(ctry1, cluster) %>% arrange(cluster) %>% distinct()
```

## Cluster 1


```{r}
df <- data_usa %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 1)
table <- df_sub %>% select(ctry1) %>% distinct()
```

```{r}
knitr::kable(table)
```

```{r}
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/usa/pre_treatment1.pdf', width = 7, height = 3)

```


```{r, }
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
             # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  
  
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 

#ggsave('./graphs/usa/results_cluster1.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_cluster1.pdf', width = 7, height = 3)


```



```{r, fig.show='hold', out.width="80%", fig.cap="Cluster 1", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_treatment1.pdf',
                          './graphs/usa/results_cluster1.pdf',
                          './graphs/usa/ct_cluster1.pdf'))

```


## Cluster 2


```{r}
df <- data_usa %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 2)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```

```{r}
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/usa/pre_treatment2.pdf', width = 7, height = 3)

```

```{r, }
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
             # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/usa/results_cluster2.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_cluster2.pdf', width = 7, height = 3)

```

```{r , fig.show='hold', out.width="80%",fig.cap = "Cluster 6", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_treatment2.pdf',
                          './graphs/usa/results_cluster2.pdf',
                          './graphs/usa/ct_cluster2.pdf'))
```

## Cluster 3


```{r}
df <- data_usa %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 4)
table <- df_sub %>% select(ctry1) %>% distinct()
```

```{r}
knitr::kable(table)
```

```{r}
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/usa/pre_treatment4.pdf', width = 7, height = 3)

```


```{r, }
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
             # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/usa/results_cluster4.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_cluster4.pdf', width = 7, height = 3)



```

```{r , fig.show='hold', out.width="80%", fig.cap = "Cluster 6", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_treatment4.pdf',
                          './graphs/usa/results_cluster4.pdf',
                          './graphs/usa/ct_cluster4.pdf'))
```

## Cluster 4


```{r}
df <- data_usa %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 5)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```

```{r}
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')


#ggsave('./graphs/usa/pre_treatment3.pdf', width = 7, height = 3)

```


```{r, }
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
             # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)


results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  
  
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/usa/results_cluster3.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_cluster3.pdf', width = 7, height = 3)


```

```{r , fig.show='hold', out.width="80%",fig.cap = "Cluster 6", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_treatment3.pdf',
                          './graphs/usa/results_cluster3.pdf',
                          './graphs/usa/ct_cluster3.pdf'))
```

## Cluster 5


```{r}
df <- data_usa %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 6 & ctry1 != 'Canada')
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```


```{r}
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/usa/pre_treatment6.pdf', width = 7, height = 3)

```



```{r, }
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
             # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)


results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/usa/results_cluster6.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_cluster6.pdf', width = 7, height = 3)



```



```{r , fig.show='hold', out.width="80%", fig.cap = "Cluster 6", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_treatment6.pdf',
                          './graphs/usa/results_cluster6.pdf',
                          './graphs/usa/ct_cluster6.pdf'))
```

## Cluster 6


```{r}
df <- data_usa %>% drop_na(IdealPointDistance)

df <- df %>% drop_na(polity2_dist, gdp_cap_dist)

df <- df %>% group_by(ctry1) %>% mutate(n = n()) 


df_sub <- df %>% filter(cluster == 7)
table <- df_sub %>% select(ctry1) %>% distinct()
```


```{r}
knitr::kable(table)
```


```{r}
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/usa/pre_treatment7.pdf', width = 7, height = 3)


```

```{r, }
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
             # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/usa/results_cluster7.pdf', width = 7, height = 3)


plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_cluster7.pdf', width = 7, height = 3)

```



```{r , fig.show='hold', out.width="80%", fig.cap = "Cluster 7", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_treatment7.pdf',
                          './graphs/usa/results_cluster7.pdf',
                          './graphs/usa/ct_cluster7.pdf'))
```


# Europe analysis


```{r}
data_china <- read_csv2("./replication_files/data/data/final_data/china_data_with_cluster.csv") %>% filter(year > 2003)

```

```{r}
# Get data on continents
countries <- gisco_get_countries(
  year = "2016",
  epsg = "4326",
  resolution = "10"
)

continents <- gisco_countrycode


countries <- countries %>% left_join(continents, by = "ISO3_CODE")

countries <- subset(countries, CNTR_NAME != "Antarctica")
countries_small <- select(countries, CNTR_ID, continent, eu) %>% 
  rename(Iso = CNTR_ID)

countries_small$geometry <- NULL

countries_small$countryname <- countrycode::countrycode(countries_small$Iso, "iso2c", "country.name")

europe <- countries_small %>% filter(eu == 'TRUE')

```


## China - Europe


```{r}
data_china <- data_china %>% filter(ctry1 %in% europe$countryname)

df_sub <- data_china %>% drop_na(IdealPointDistance)

df_sub <- df_sub %>% drop_na(polity2_dist, gdp_cap_dist)

```

```{r}
#### Reproduces Figure 6a ####
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')


#ggsave('./graphs/china/pre_europe.pdf', width = 7, height = 3)


```


```{r, }
#### Reproduces Figure 6b/ 6c in the manuscript ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2 + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 


#ggsave('./graphs/china/results_europe.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/china/ct_europe.pdf', width = 7, height = 3)

```

```{r , fig.show='hold', out.width="80%", fig.cap = "Analysis EU Members and Cina", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/china/pre_europe.pdf',
                          './graphs/china/results_europe.pdf',
                          './graphs/china/ct_europe.pdf'))
```


## USA - Europe

```{r}
data_usa <- data_usa %>% filter(ctry1 %in% europe$countryname)

df_sub <- data_usa %>% drop_na(IdealPointDistance)

df_sub <- df_sub %>% drop_na(polity2_dist, gdp_cap_dist)


```


```{r}
#### Reproduces Figure 7a ####
ggplot(df_sub %>% filter(year %in% c(2004:2013)), aes(year, IdealPointDistance, color = ctry1)) +
  geom_line() +
  theme_minimal() +
  theme(legend.position = 'none') +
  
  labs(x = 'Year')

#ggsave('./graphs/usa/pre_europe.pdf', width = 7, height = 3)

```


```{r, }
#### Reproduces Figures 7b & 7c in the manuscript ####
m1 <- gsynth(IdealPointDistance ~ treated + ctry1_fariss + log(ctry1_gdp_cap) + ctry1_polity2 + ctry1_polity2, 
             data = df_sub, 
             index = c("ctry1", "year"),
             se = T,
             force = 'time',
            # CV = TRUE,
             r = c(0, 5),
             inference = "parametric", nboots = 1000,
             parallel = FALSE)

results <- as.tibble(m1$est.att) %>%
  mutate(n = row_number()) %>%
  mutate(ci_90_low = ATT - 1.645 * S.E., 
         ci_90_high = ATT + 1.645 * S.E., 
         n = n-10.5)

ggplot(results, aes(n, ATT)) +
  geom_hline(yintercept = 0, lty = 2, color = "gray60") +
  geom_vline(xintercept = 0, lty  =2 ) +
  geom_linerange(data = results,
                 aes(x = n, y = ATT, ymin = ci_90_low, ymax = ci_90_high), 
                 size = 0.75, 
                 color = 'gray80', 
                 position = position_dodge(width = 1/2)) +
  
  geom_point(position = position_dodge(width = 1/2), size = 2) + 
  labs(y = "ATT", 
       x = "Time") +
  theme_minimal() + 
  theme(axis.title.y=element_blank(), legend.position = "none") 

#ggsave('./graphs/usa/results_europe.pdf', width = 7, height = 3)

plot(m1, type = 'ct') +
  theme_minimal() +
  theme(legend.position = 'bottom') +
  labs(title = "",
       x = 'Treatment Time')

#ggsave('./graphs/usa/ct_europe.pdf', width = 7, height = 3)

```


```{r , fig.show='hold', out.width="80%", fig.cap = "Analysis EU Members and the USA", fig.subcap=c('Pre-Treatment period', 'ATT with 90 percent CI', 'Counterfactual')}
knitr::include_graphics(c('./graphs/usa/pre_europe.pdf',
                          './graphs/usa/results_europe.pdf',
                          './graphs/usa/ct_europe.pdf'))
```

# Descriptives

```{r sign-data, }

# Reading data
sign_data <- read_xlsx("./replication_files/data/data/raw_data/BRI_Dates_byCountry.xlsx",2)

# Add countrycodes 
sign_data$cowc <- countrycode(sign_data$Country, "country.name", "cowc") 

# Check cases where it did not work
cowc_fails <- filter(sign_data, is.na(cowc))
unique(cowc_fails$Country) # only island states have no available ccodes

# Create panel dataset
data <- sign_data %>% 
  rowwise() %>% 
  mutate(year = list(2013:2022)) %>% 
  unnest(cols = c(year)) 

# Retrieve ratification year
data$date_signed <- as.Date(data$Date_MoU_Signed, "%d.%m.%Y")
data$year_signed <- format(data$date_signed, "%Y")
data$year_signed <- as.numeric(data$year_signed)

# Create indicator of the BRI has been signed in a given year
data$BRI_signed <- ifelse(data$year >= data$year_signed, 1, 0)
data$BRI_signed <- ifelse(is.na(data$BRI_signed), 0, data$BRI_signed)


```

```{r}
# Add Fariss HR scores
fariss <- read_delim(file="./replication_files/data/data/raw_data/HumanRightsProtectionScores_v4.01.csv", delim=",")

# Prepare data
fariss$cowc <- countrycode(fariss$COW, "cown", "cowc")
fariss$COW[is.na(fariss$cowc)] # German Federal Republic
fariss$YEAR[is.na(fariss$cowc)] # German Federal Republic until 1990
fariss <- subset(fariss, !is.na(cowc))
fariss$year <- fariss$YEAR
fariss2 <- fariss %>% dplyr::select(cowc, year, theta_mean, theta_sd, CIRI)
fariss2 <- filter(fariss2, year > 2012)
# Check and remove duplicates
see <- duplicated(fariss2[c("cowc", "year")])
table(see)

# Create unique identifier variable
fariss2$cowc_year <- paste(fariss2$cowc, "_", fariss2$year)
fariss2 <- dplyr::select(fariss2, cowc_year, theta_mean)
data$cowc_year <- paste(data$cowc, "_", data$year)

# Left join
data <- left_join(data, fariss2, by = "cowc_year")


```


```{r}
# Add World Bank data of GDP per capita
gdpcap <- read.csv2("./replication_files/data/data/raw_data/gdpcap_2022.csv")
colnames(gdpcap) <- gsub("^X", "",  colnames(gdpcap))
gdpdat <- gdpcap %>% 
  gather(year, gdpcap, "1960":"2022")
gdpdat$year <- as.numeric(gdpdat$year)
gdpdat <- filter(gdpdat, year > 2012)
gdpdat$cowc <- countrycode(gdpdat$Country.Name, "country.name", "cowc")
gdpdat <- filter(gdpdat, !is.na(gdpdat$cowc))
gdpdat$cowc_year <- paste(gdpdat$cowc, "_", gdpdat$year)

# Select relevant variables
gdpdat <- dplyr::select(gdpdat, cowc_year, gdpcap)

# Check and remove duplicates
see <- gdpdat[duplicated(gdpdat$cowc_year), ]
gdpdat <- gdpdat[!duplicated(gdpdat$cowc_year), ]

# Left join to dataset
data <- left_join(data, gdpdat, by = "cowc_year")
data$gdpcap <- as.numeric(data$gdpcap)

```

```{r}

# Load V-Dem Version 13
vdem <- read_delim(file="./replication_files/data/data/raw_data/V-Dem-CY-Full+Others-v13.csv", delim=",")

# Variables
vdem2 <- vdem[,c("country_name", "country_id", "year"
                ,"v2x_polyarchy", "v2x_libdem"
                ,"v2x_accountability", "v2x_veracc", "v2x_diagacc", "v2x_horacc"
                 ,"v2cseeorgs","v2csreprss", "v2cscnsult", "v2x_cspart",
                 "v2csstruc_nr", "v2csprtcpt", "v2csgender"
                ,"v2csantimv", "v2csanmvch_nr", "v2mecenefm",
                "v2mecenefi", "v2meharjrn", "v2peedueq", 
                "v2peapsecon", "v2clacjust", "v2cldiscw", "v2x_freexp",
                "v2peasjgeo", "v2x_freexp_altinf", "v2cldiscm",
                "v2juncind", "v2juhcind", "v2x_corr", "e_polity2", "e_wbgi_gee",
                "e_cow_exports", "e_cow_imports", "e_miinterc")]

# Subset
vdem3 <- subset(vdem2, year>=2012)

# Add country code
vdem3$cowc <- countrycode(vdem3$country_name, "country.name", "cowc") 
vdem3$cowc[vdem3$country_name=="Serbia"] <- "YUG"
vdem3 <- subset(vdem3, !is.na(cowc))

# unique?
table(duplicated(vdem3[c("cowc", "year")]))

# Left join
vdem3$cowc_year <- paste(vdem3$cowc, "_", vdem3$year)
vdem3 <- dplyr::select(vdem3, -cowc, -year)
data <- left_join(data, vdem3, by = "cowc_year")

```

```{r}
# Load World Bank data of Population size
popsize <- read.csv2("./replication_files/data/data/raw_data/popsize.csv")
colnames(popsize) <- gsub("^X", "",  colnames(popsize))
popsize <- popsize %>% 
  gather(year, popsize, "1960":"2022")
popsize$year <- as.numeric(popsize$year)
popsize <- filter(popsize, year > 1978)
popsize$cowc <- countrycode(popsize$Country.Name, "country.name", "cowc")
popsize <- popsize %>%  filter(!is.na(cowc))
popsize$cowc_year <- paste(popsize$cowc, "_", popsize$year)
popsize <- filter(popsize, year > 2012)

# Select relevant variables
popsize <- dplyr::select(popsize, cowc_year, popsize)

# Check and remove duplicates
see <- popsize[duplicated(popsize$cowc_year), ]
popsize <- popsize[!duplicated(popsize$cowc_year), ]

# Left join to dataset
data <- left_join(data, popsize, by = "cowc_year")
data$popsize <- as.numeric(data$popsize)


```


```{r}
# Check correlations
cor(data$BRI_signed, data$v2x_libdem, use="complete.obs")
cor(data$BRI_signed, data$theta_mean, use="complete.obs")
cor(data$BRI_signed, data$gdpcap, use="complete.obs")
cor(data$BRI_signed, data$popsize, use="complete.obs")
cor(data$BRI_signed, data$v2csreprss, use="complete.obs")
cor(data$BRI_signed, data$v2x_freexp, use="complete.obs")
cor(data$BRI_signed, data$v2juhcind, use="complete.obs")
cor(data$BRI_signed, data$v2x_corr, use="complete.obs")
cor(data$BRI_signed, data$e_polity2, use="complete.obs")
cor(data$BRI_signed, data$e_wbgi_gee, use="complete.obs")
cor(data$BRI_signed, data$e_cow_exports, use="complete.obs")
cor(data$BRI_signed, data$e_cow_imports, use="complete.obs")

# Logistic regression model
m1 <- glm(BRI_signed ~ v2x_libdem + gdpcap + popsize + theta_mean + v2x_corr, family = binomial(link = logit), data = data)
summary(m1)

# Multicollinearity check
vif(m1)

# Logistic regression model with Year FEs
m1 <- glm(BRI_signed ~ v2x_libdem + gdpcap + popsize + theta_mean + v2x_corr + factor(year), family = binomial(link = logit), data = data)
summary(m1)

#### Negative binomial model: Standardized coefficient plot: 95% and 99% CI ####
numeric_df <- dplyr::select(data, v2x_libdem, gdpcap, popsize, theta_mean, v2x_corr)
# Z-standardization
standardized_df <- scale(numeric_df)
apply(standardized_df, 2, sd, na.rm = T)
# Add dependent variable
standardized_df <- cbind(standardized_df, data$BRI_signed)
standardized_df <- cbind(standardized_df, data$year)
standardized_df <- cbind(standardized_df, data$Country)
apply(standardized_df, 2, sd, na.rm = T)
# As data.frame
standardized_df <- as.data.frame(standardized_df)
# Rename variables
standardized_df <- rename(standardized_df, BRI_signed = V6)
standardized_df <- rename(standardized_df, year = V7)
standardized_df <- rename(standardized_df, country = V8)
standardized_df$BRI_signed <- as.numeric(standardized_df$BRI_signed)
standardized_df$v2x_libdem <- as.numeric(standardized_df$v2x_libdem)
standardized_df$gdpcap <- as.numeric(standardized_df$gdpcap)
standardized_df$popsize <- as.numeric(standardized_df$popsize)
standardized_df$theta_mean <- as.numeric(standardized_df$theta_mean)
standardized_df$v2x_corr <- as.numeric(standardized_df$v2x_corr)


# Re-run models with z-standardized dataset
m1 <- glm(BRI_signed ~ v2x_libdem + gdpcap + popsize + theta_mean + v2x_corr, family = binomial(link = logit), data = standardized_df)
summary(m1)
s <- coeftest(m1, vcov = vcovCL(m1, cluster = m1$country))
stargazer(m1, se = list(s[, "Std. Error"]), type = "text")

m2 <- glm(BRI_signed ~ v2x_libdem + gdpcap + popsize + theta_mean + v2x_corr + factor(year), family = binomial(link = logit), data = standardized_df)
summary(m2)
s <- coeftest(m2, vcov = vcovCL(m2, cluster = m2$country))
stargazer(m2, se = list(s[, "Std. Error"]), type = "text")

m3 <- glm(BRI_signed ~ v2x_libdem + gdpcap + popsize + theta_mean + v2x_corr + factor(year) + factor(country), family = binomial(link = logit), data = standardized_df)
summary(m3)
s <- coeftest(m3, vcov = vcovCL(m3, cluster = m3$country))
stargazer(m3, se = list(s[, "Std. Error"]), type = "text")

# Create coefficient plot
library(broom)
library(ggplot2)
results_m1 <- m1 %>% tidy()
results_m1 <- results_m1 %>% mutate(lower = estimate - 1.96*std.error, 
                                    upper = estimate + 1.96*std.error, 
                                    lower_99 = estimate -	2.576*std.error, 
                                    upper_99 = estimate +	2.576*std.error,
                                    Scenario = "No fixed effects")

results_m2 <- m2 %>% tidy()
results_m2 <- results_m2 %>% mutate(lower = estimate - 1.96*std.error, 
                                    upper = estimate + 1.96*std.error, 
                                    lower_99 = estimate -	2.576*std.error, 
                                    upper_99 = estimate +	2.576*std.error,
                                    Scenario = "Year fixed effects")

# results_m3 <- m3 %>% tidy()
# results_m3 <- results_m3 %>% mutate(lower = estimate - 1.96*std.error, 
#                                     upper = estimate + 1.96*std.error, 
#                                     lower_99 = estimate -	2.576*std.error, 
#                                     upper_99 = estimate +	2.576*std.error,
#                                    Scenario = "Country fixed effects")


results <- rbind(results_m1, results_m2)
results <- results %>% 
  filter(!term %in% c("factor(year)2014", 
                      "factor(year)2015", "factor(year)2016", "factor(year)2017", "factor(year)2018",
                      "factor(year)2019", "(Intercept)", "factor"))  %>%
  filter(!grepl("factor", term))

results$term[results$term == "gdpcap"] <- "GDP per capita"
results$term[results$term == "v2x_libdem"] <- "LibDem Index"
results$term[results$term == "popsize"] <- "Log Popsize"
results$term[results$term == "theta_mean"] <- "Fariss HR"
results$term[results$term == "v2x_corr"] <- "Pol. Corruption"

#### Reproduces Figure 2 in the manuscript ####
#pdf(file = "./graphs/coefPLOT99_95.pdf")
ggplot(results) +
  geom_linerange(aes(ymin = lower_99, ymax = upper_99,
                     y = estimate, x = term, shape = Scenario), 
                 position = position_dodge(width = 0.5), 
                 color = "gray40", size = 0.75) +
  geom_linerange(aes(ymin = lower, ymax = upper,
                     y = estimate, x = term, shape = Scenario), 
                 position = position_dodge(width = 0.5), 
                 color = "gray80", size = 1.25) +
  
  geom_point(aes(y = estimate, x = term, shape = Scenario), position = position_dodge(width = 0.5), size = 2) +
  geom_hline(yintercept = 0, lty = 2) +
  xlab("") +
  coord_flip() +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle("DV: BRI membership")
#dev.off()



```

```{r}
# Merge country_level data with with China Ideal Points data 

# Load data
china_dist <- read.csv2("./replication_files/data/data/final_data/china_data.csv")

# Prepare for merging
china_dist$cowc <- countrycode(china_dist$ctry1, "country.name", "cowc")
china_dist$cowc_year <- paste(china_dist$cowc, "_", china_dist$year)
china_dist <- filter(china_dist, year > 2012)
china_dist <- dplyr::select(china_dist, cowc_year, IdealPointDistance)

# Left joint to dataset
data <- left_join(data, china_dist, by = "cowc_year")

```

```{r}
# Run Diff-in-diff model as robustness test
library(did)

# Create numeric ID variable
data <- data  %>%                                    
  group_by(Country) %>%
  dplyr::mutate(ID = cur_group_id())


# estimate group-time average treatment effects using att_gt method
example_attgt <- att_gt(yname = "IdealPointDistance",
                        tname = "year",
                        idname = "ID",
                        gname = "year_signed",
                        panel = T,
                        xformla = ~ v2x_libdem + gdpcap + theta_mean + v2x_corr,
                        data = data)
summary(example_attgt)

# plot the results
ggdid(example_attgt)

# Simple aggregation
agg.simple <- aggte(example_attgt, type = "simple")
summary(agg.simple)

# Dynamics effects
agg.es <- aggte(example_attgt, type = "dynamic")
summary(agg.es)

#### Reproduces Figure A.18 ####
#pdf(file = "diff-in-diff.pdf")
ggdid(agg.es) + ylab("Ideal Point Distances to China") + theme(plot.title = element_text(hjust = 0.5, color = "black")) + theme(axis.title.y = element_text(colour = "black"))
#dev.off()

# Group-specific effects
agg.gs <- aggte(example_attgt, type = "group")
summary(agg.gs)

#### Re-run analysis with subset of EU member states
data_sub <- data %>% filter(Country %in% c("Austria", "Belgium", "Bulgaria", "Croatia", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Canada",
                                     "Romania", "Slovakia", "Slovenia", "Spain", "Sweden", "United Kingdom"))


# estimate group-time average treatment effects using att_gt method
example_attgt <- att_gt(yname = "IdealPointDistance",
                        tname = "year",
                        idname = "ID",
                        gname = "year_signed",
                        panel = T,
                        control_group = "notyettreated",
                        xformla = ~ v2x_libdem + gdpcap + theta_mean + v2x_corr,
                        data = data_sub)
summary(example_attgt)

# Simple aggregation
agg.simple <- aggte(example_attgt, type = "simple", na.rm = T)
summary(agg.simple)

# Dynamics effects
agg.es <- aggte(example_attgt, type = "dynamic", na.rm = T)
summary(agg.es)




```

```{r, }
# end
```


